import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
import seaborn as sns
import os

plt.rcParams['font.sans-serif'] = ['SimHei'] # 正常显示汉字
plt.rcParams['axes.unicode_minus'] = False   # 正常显示符号

output_dir = "../fig"
os.makedirs(output_dir, exist_ok=True)
# 读取数据
data = pd.read_csv('../data/train.csv')

data.drop(['StandardHours'], axis=1, inplace=True)

# 只保留数值型特征
num_data = data.select_dtypes(include=['number'])

# 计算皮尔逊相关系数矩阵
corr_matrix = num_data.corr(method='pearson')

# 绘制带数值的热力图
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, cmap='coolwarm', center=0, annot=True, fmt=".2f")
plt.title('数值特征皮尔逊相关系数热力图（带数值）')
plt.tight_layout()

# 保存图像
save_path = os.path.join(output_dir, "pearson_correlation_heatmap.png")
plt.savefig(save_path, dpi=300)
plt.close()

print(f"✅ 热力图已成功保存到: {save_path}")